######## snakemake preamble start (automatically inserted, do not edit) ########
import sys; sys.path.extend(['/biggin/b147/univ4859/miniconda3/envs/snakemake/lib/python3.9/site-packages', '/biggin/b147/univ4859/research/03_macroconf/notebooks']); import pickle; snakemake = pickle.loads(b'\x80\x04\x95\xe2"\x00\x00\x00\x00\x00\x00\x8c\x10snakemake.script\x94\x8c\tSnakemake\x94\x93\x94)\x81\x94}\x94(\x8c\x05input\x94\x8c\x0csnakemake.io\x94\x8c\nInputFiles\x94\x93\x94)\x81\x94(\x8cRdata/interim/refactor-test/22/H2O/11_GaMD_full/2000/0/ed6dd3148ef9b069_weights.dat\x94\x8cMdata/interim/refactor-test/22/H2O/11_GaMD_full/2000/0/ed6dd3148ef9b069_md.out\x94\x8cRdata/interim/refactor-test/22/H2O/11_GaMD_full/2000/0/ed6dd3148ef9b069_traj.netcdf\x94\x8cPdata/interim/refactor-test/22/H2O/11_GaMD_full/2000/0/ed6dd3148ef9b069_traj.ncdf\x94\x8c?data/interim/refactor-test/22/H2O/1_make_topology/mc_sol.prmtop\x94\x8c\'data/interim/refactor-test/22/data.json\x94\x8c&data/interim/refactor-test/22/NOE.json\x94e}\x94(\x8c\x06_names\x94}\x94(\x8c\x07weights\x94K\x00N\x86\x94\x8c\x03out\x94K\x01N\x86\x94\x8c\x04traj\x94K\x02N\x86\x94\x8c\ttraj_ncdf\x94K\x03N\x86\x94\x8c\x03top\x94K\x04N\x86\x94\x8c\x04parm\x94K\x05N\x86\x94\x8c\x03noe\x94K\x06N\x86\x94u\x8c\x12_allowed_overrides\x94]\x94(\x8c\x05index\x94\x8c\x04sort\x94eh$\x8c\tfunctools\x94\x8c\x07partial\x94\x93\x94h\x06\x8c\x19Namedlist._used_attribute\x94\x93\x94\x85\x94R\x94(h*)}\x94\x8c\x05_name\x94h$sNt\x94bh%h(h*\x85\x94R\x94(h*)}\x94h.h%sNt\x94bh\x14h\nh\x16h\x0bh\x18h\x0ch\x1ah\rh\x1ch\x0eh\x1eh\x0fh h\x10ub\x8c\x06output\x94h\x06\x8c\x0bOutputFiles\x94\x93\x94)\x81\x94(\x8cVdata/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_pca_dihed.png\x94\x8cPdata/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_noe.png\x94\x8cTdata/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_noe_pmf.png\x94\x8cYdata/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_cluster_plot.png\x94\x8cXdata/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_cluster_pca.png\x94\x8c]data/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_cluster_min_samp.png\x94\x8cYdata/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_cluster_time.png\x94\x8c\\data/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_cluster_structs.png\x94\x8cZdata/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_noe_stat_plot.png\x94\x8c^data/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_clusters/clusters.pdb\x94\x8cXdata/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_noe_result.json\x94\x8cWdata/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_noe_stats.json\x94\x8cVdata/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_dihedrals.dat\x94\x8cQdata/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_dPCA.dat\x94\x8c\\data/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_dPCA_weights_MC.dat\x94\x8cUdata/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_NOE_dist.dat\x94\x8cUdata/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_multiple.dat\x94e}\x94(h\x12}\x94(\x8c\x08pca_dihe\x94K\x00N\x86\x94\x8c\x08noe_plot\x94K\x01N\x86\x94\x8c\x07noe_pmf\x94K\x02N\x86\x94\x8c\x0ccluster_plot\x94K\x03N\x86\x94\x8c\x0bcluster_pca\x94K\x04N\x86\x94\x8c\x10cluster_min_samp\x94K\x05N\x86\x94\x8c\x0ccluster_time\x94K\x06N\x86\x94\x8c\x0fcluster_structs\x94K\x07N\x86\x94\x8c\rnoe_stat_plot\x94K\x08N\x86\x94\x8c\x0bcluster_pdb\x94K\tN\x86\x94\x8c\nnoe_result\x94K\nN\x86\x94\x8c\tnoe_stats\x94K\x0bN\x86\x94\x8c\tdihedrals\x94K\x0cN\x86\x94\x8c\x04dPCA\x94K\rN\x86\x94\x8c\x0fdPCA_weights_MC\x94K\x0eN\x86\x94\x8c\x08noe_dist\x94K\x0fN\x86\x94\x8c\x08multiple\x94K\x10N\x86\x94uh"]\x94(h$h%eh$h(h*\x85\x94R\x94(h*)}\x94h.h$sNt\x94bh%h(h*\x85\x94R\x94(h*)}\x94h.h%sNt\x94bhKh8hMh9hOh:hQh;hSh<hUh=hWh>hYh?h[h@h]hAh_hBhahChchDhehEhghFhihGhkhHub\x8c\x06params\x94h\x06\x8c\x06Params\x94\x93\x94)\x81\x94(\x8cRdata/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_clusters/\x94\x8c\x04GaMD\x94e}\x94(h\x12}\x94(\x8c\x0bcluster_dir\x94K\x00N\x86\x94\x8c\x06method\x94K\x01N\x86\x94uh"]\x94(h$h%eh$h(h*\x85\x94R\x94(h*)}\x94h.h$sNt\x94bh%h(h*\x85\x94R\x94(h*)}\x94h.h%sNt\x94bh~hzh\x80h{ub\x8c\twildcards\x94h\x06\x8c\tWildcards\x94\x93\x94)\x81\x94(\x8c\rrefactor-test\x94\x8c\x0222\x94\x8c\x03H2O\x94\x8c\x042000\x94\x8c\x010\x94\x8c\x10ed6dd3148ef9b069\x94e}\x94(h\x12}\x94(\x8c\x08exp_name\x94K\x00N\x86\x94\x8c\x0ccompound_dir\x94K\x01N\x86\x94\x8c\x07solvent\x94K\x02N\x86\x94\x8c\x04time\x94K\x03N\x86\x94\x8c\x06repeat\x94K\x04N\x86\x94\x8c\x05index\x94K\x05N\x86\x94uh"]\x94(h$h%eh$h\x94h%h(h*\x85\x94R\x94(h*)}\x94h.h%sNt\x94b\x8c\x08exp_name\x94h\x8fh\x99h\x90\x8c\x07solvent\x94h\x91\x8c\x04time\x94h\x92\x8c\x06repeat\x94h\x93ub\x8c\x07threads\x94K\x06\x8c\tresources\x94h\x06\x8c\tResources\x94\x93\x94)\x81\x94(K\x06K\x01\x8c\x04/tmp\x94e}\x94(h\x12}\x94(\x8c\x06_cores\x94K\x00N\x86\x94\x8c\x06_nodes\x94K\x01N\x86\x94\x8c\x06tmpdir\x94K\x02N\x86\x94uh"]\x94(h$h%eh$h(h*\x85\x94R\x94(h*)}\x94h.h$sNt\x94bh%h(h*\x85\x94R\x94(h*)}\x94h.h%sNt\x94bh\xb4K\x06h\xb6K\x01h\xb8h\xb1ub\x8c\x03log\x94h\x06\x8c\x03Log\x94\x93\x94)\x81\x94\x8c_data/processed/refactor-test/notebooks/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_GaMD_processed.ipynb\x94a}\x94(h\x12}\x94\x8c\x08notebook\x94K\x00N\x86\x94sh"]\x94(h$h%eh$h(h*\x85\x94R\x94(h*)}\x94h.h$sNt\x94bh%h(h*\x85\x94R\x94(h*)}\x94h.h%sNt\x94bh\xcah\xc7ub\x8c\x06config\x94}\x94(\x8c\tcompounds\x94]\x94(K8K\x18K\x16K7e\x8c\x10cheminfo_confgen\x94]\x94\x8c\x05omega\x94a\x8c\x11confgen_paramters\x94}\x94\x8c\x05omega\x94]\x94(\x8c\x05basic\x94\x8c\nchloroform\x94es\x8c\thash_list\x94]\x94(]\x94(\x8c\x1094214a84a78d8392\x94\x8c\x10ea902b72328019c4\x94\x8c\x1005834cbd8c236ca0\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x10f85eb31757da7e52\x94\x8c\x1005834cbd8c236ca0\x94\x8c\x109eb48310b74aa061\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x1010236f52ef18b2a3\x94\x8c\x1015ff3a3813ba7460\x94\x8c\x10e36fd3c2b58d633f\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x10c530212023f40ad9\x94\x8c\x1010236f52ef18b2a3\x94\x8c\x1094214a84a78d8392\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x100a303e31ccdf82a5\x94\x8c\x10c530212023f40ad9\x94\x8c\x1010236f52ef18b2a3\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x100a303e31ccdf82a5\x94\x8c\x106223de6c169344e9\x94\x8c\x10f85eb31757da7e52\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x100a303e31ccdf82a5\x94\x8c\x1015ff3a3813ba7460\x94\x8c\x10e36fd3c2b58d633f\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x106223de6c169344e9\x94\x8c\x10f85eb31757da7e52\x94\x8c\x10b3332ce08307c920\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x10b3332ce08307c920\x94\x8c\x101f323341d36478d5\x94\x8c\x10f8f83124ccdc7c13\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x10e36fd3c2b58d633f\x94\x8c\x10f8f83124ccdc7c13\x94\x8c\x10282f14929802f7ac\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x100a303e31ccdf82a5\x94\x8c\x10e36fd3c2b58d633f\x94\x8c\x10f8f83124ccdc7c13\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x100a303e31ccdf82a5\x94\x8c\x109eb48310b74aa061\x94\x8c\x10282f14929802f7ac\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x105ad86a1e5e790925\x94\x8c\x107e4cdc50cc70fc15\x94\x8c\x104574de591364a01f\x94\x8c\x0255\x94h\x93h\x93e]\x94(\x8c\x105ad86a1e5e790925\x94\x8c\x10395e86cd79c14300\x94\x8c\x10938ec71731842389\x94\x8c\x0255\x94h\x93h\x93e]\x94(\x8c\x108374da5ea9cc35c0\x94\x8c\x100a303e31ccdf82a5\x94\x8c\x1010236f52ef18b2a3\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x10ed6dd3148ef9b069\x94\x8c\x108374da5ea9cc35c0\x94\x8c\x100a303e31ccdf82a5\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x10ed6dd3148ef9b069\x94\x8c\x10c530212023f40ad9\x94\x8c\x1010236f52ef18b2a3\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x103d00b0964201e088\x94\x8c\x1009a11fd633f3a72b\x94\x8c\x106a9359deeeab1adc\x94\x8c\x0224\x94h\x93h\x93e]\x94(\x8c\x103d00b0964201e088\x94\x8c\x105d7022b409750c68\x94\x8c\x10e92cb0e6ed121828\x94\x8c\x0224\x94h\x93h\x93e]\x94(\x8c\x10242ee4d4af3634ed\x94\x8c\x10660d1badff741862\x94\x8c\x105ad86a1e5e790925\x94\x8c\x0255\x94h\x93h\x93e]\x94(\x8c\x10ed6dd3148ef9b069\x94\x8c\x108374da5ea9cc35c0\x94\x8c\x103c523c7ca380f925\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x108374da5ea9cc35c0\x94\x8c\x100a303e31ccdf82a5\x94\x8c\x103c523c7ca380f925\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x10ed6dd3148ef9b069\x94\x8c\x109a100624325ec4cd\x94\x8c\x109eb48310b74aa061\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x10242ee4d4af3634ed\x94\x8c\x106163f1ecc9f26d15\x94\x8c\x10938ec71731842389\x94\x8c\x0255\x94\x8c\x05omega\x94\x8c\x05basic\x94e]\x94(\x8c\x10ed6dd3148ef9b069\x94\x8c\x10db88602b153df078\x94\x8c\x10c74338f1c2e97967\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x1004326f349933814b\x94\x8c\x10db88602b153df078\x94\x8c\x10c74338f1c2e97967\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x10ed6dd3148ef9b069\x94\x8c\x10bbe75067f45d5efd\x94\x8c\x109a100624325ec4cd\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x102088374558ed79c5\x94\x8c\x10b269c69e29e2b3a6\x94\x8c\x10d7cd9d09c7724d89\x94\x8c\x0256\x94\x8c\x05omega\x94\x8c\x05basic\x94e]\x94(\x8c\x108374da5ea9cc35c0\x94\x8c\x10b42d1cec1349e562\x94\x8c\x103d110be4b2d63f96\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x10ed6dd3148ef9b069\x94\x8c\x101a8e577036845661\x94\x8c\x10bbe75067f45d5efd\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x10ed6dd3148ef9b069\x94\x8c\x103d110be4b2d63f96\x94\x8c\x10b42d1cec1349e562\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x10ed6dd3148ef9b069\x94\x8c\x108374da5ea9cc35c0\x94\x8c\x103c523c7ca380f925\x94\x8c\x0222\x94\x8c\x05omega\x94\x8c\x05basic\x94e]\x94(\x8c\x10ed6dd3148ef9b069\x94\x8c\x108374da5ea9cc35c0\x94\x8c\x105eedf239922efaee\x94\x8c\x0222\x94\x8c\x05omega\x94\x8c\x05basic\x94e]\x94(\x8c\x103d00b0964201e088\x94\x8c\x1009a11fd633f3a72b\x94\x8c\x105d7022b409750c68\x94\x8c\x0224\x94\x8c\x05omega\x94\x8c\x05basic\x94e]\x94(\x8c\x10459a7f09ce68039a\x94\x8c\x103fe88b3d3ed6d9d3\x94\x8c\x10d236cb661a6d3813\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x10ed6dd3148ef9b069\x94\x8c\x10459a7f09ce68039a\x94\x8c\x101a8e577036845661\x94\x8c\x0222\x94h\x93h\x93e]\x94(\x8c\x10ed6dd3148ef9b069\x94\x8c\x108374da5ea9cc35c0\x94\x8c\x103c523c7ca380f925\x94\x8c\x0222\x94\x8c\x05omega\x94\x8c\nchloroform\x94e]\x94(\x8c\x10ed6dd3148ef9b069\x94\x8c\x10459a7f09ce68039a\x94\x8c\x10d236cb661a6d3813\x94\x8c\x0222\x94\x8c\x05omega\x94\x8c\nchloroform\x94e]\x94(\x8c\x10ed6dd3148ef9b069\x94\x8c\x108374da5ea9cc35c0\x94\x8c\x10d236cb661a6d3813\x94\x8c\x0222\x94\x8c\x05omega\x94\x8c\x05basic\x94e]\x94(\x8c\x10ed6dd3148ef9b069\x94\x8c\x108374da5ea9cc35c0\x94\x8c\x103c523c7ca380f925\x94\x8c\x0222\x94\x8c\x05rdkit\x94\x8c\x05basic\x94e]\x94(\x8c\x103d00b0964201e088\x94\x8c\x1009a11fd633f3a72b\x94\x8c\x105d7022b409750c68\x94\x8c\x0224\x94\x8c\x05rdkit\x94\x8c\x05basic\x94e]\x94(\x8c\x10242ee4d4af3634ed\x94\x8c\x106163f1ecc9f26d15\x94\x8c\x10938ec71731842389\x94\x8c\x0255\x94\x8c\x05rdkit\x94\x8c\x05basic\x94e]\x94(\x8c\x102088374558ed79c5\x94\x8c\x10b269c69e29e2b3a6\x94\x8c\x10d7cd9d09c7724d89\x94\x8c\x0256\x94\x8c\x05rdkit\x94\x8c\x05basic\x94ee\x8c\x06stride\x94K\n\x8c\x0ccluster_conf\x94}\x94(\x8c\x10242ee4d4af3634ed\x94K\x0f\x8c\x108374da5ea9cc35c0\x94K\n\x8c\x103c523c7ca380f925\x94K\n\x8c\x10ca1a37290d9e454e\x94K\x08\x8c\x109a100624325ec4cd\x94K\x06\x8c\x103d00b0964201e088\x94K\n\x8c\x10ed6dd3148ef9b069\x94K\x08u\x8c\x04ns_h\x94G@)\x00\x00\x00\x00\x00\x00\x8c\x08exp_name\x94\x8c\rrefactor-test\x94\x8c\tdata_name\x94\x8c\x1722-02-2021_MacroConf-v2\x94\x8c\x0bsample_file\x94\x8c\x0bsamples.tsv\x94\x8c\rsample_output\x94\x8c\x0fsamples_old.tsv\x94\x8c\nforcefield\x94\x8c1libs/forcefields/leaprc.protein.ff14SB_noterminal\x94\x8c\x0bDMSO_params\x94\x8c!libs/md_solvents/dmso/frcmod.dmso\x94\x8c\x08DMSO_box\x94\x8c!libs/md_solvents/dmso/dmsobox.off\x94\x8c\x11Chloroform_params\x94\x8c\x0cfrcmod.chcl3\x94\x8c\x16clustering_min_samples\x94}\x94(\x8c\x0222\x94K\n\x8c\x011\x94K"u\x8c\x0eaMD_clustering\x94}\x94(\x8c\x0256\x94K\x0c\x8c\x0255\x94K\n\x8c\x0222\x94K\x07u\x8c\x0ecMD_clustering\x94}\x94(\x8c\x0222\x94K\n\x8c\x0224\x94K\x07\x8c\x0255\x94K\x0f\x8c\x0256\x94K\x0bu\x8c\x0fGaMD_clustering\x94}\x94(\x8c\x0222\x94K\x0c\x8c\x0224\x94K\n\x8c\x0256\x94K u\x8c\nmd_methods\x94]\x94(\x8c\x03cMD\x94\x8c\x04GaMD\x94\x8c\x03aMD\x94e\x8c\x0bcMD_repeats\x94K\x01\x8c\x08cMD_time\x94Kd\x8c\x0baMD_repeats\x94K\x01\x8c\x08aMD_time\x94Kd\x8c\x0cGaMD_repeats\x94K\x01\x8c\tGaMD_time\x94Kd\x8c\x08md_times\x94N\x8c\x04em_1\x94X\xc7\x01\x00\x00Minimize water\n System minimization.\n&cntrl\n   imin=1, ntmin=1, !(Invoke Minimization)\n   ntx=1, irest=0, !(read init. coords, no restart)\n   maxcyc=20000, ncyc=15000, !(max. cycles switch to conj. grad.)\n   drms=0.1, !(convergence criterion)\n   ntpr=100, ntwr=100, iwrap=0, !(outputs)\n   ntf=1, !(force eval all)\n   cut=8.0, !(non-bonded cutoff)\n   ntr=1, !(cartesian restraints)\n   restraintmask="!:WAT", restraint_wt=10.0, !(restrain all but water)\n /\n\x94\x8c\x04em_2\x94X\xb3\x01\x00\x00Relax water\n LET WATER MOVE\n&cntrl\n  imin=0,\n  ntx=1, irest=0,\n  ntpr=500, ntwx=500, ntwv=0, ntwe=0,\n  ntwr=5000, !(outputs)\n  t=0.0, dt=0.002, !(timestep)\n  nstlim=10000, iwrap=1, !(steps, wrap to box)\n  ntt=1, !(temp. control)\n  temp0=300.0, tempi=200.0, tautp=0.5,\n  ntp=1, pres0=1.0 !(pressure control)\n  ntc=2, ntf=2, !(SHAKE)\n  ntr=1, !(cart. restraints)\n  restraintmask="!:WAT" , restraint_wt=10.0, !(restrain all but water)\n /\n\x94u\x8c\x04rule\x94\x8c\x0cmd_GaMD_anal\x94\x8c\x0fbench_iteration\x94N\x8c\tscriptdir\x94\x8c5/biggin/b147/univ4859/research/03_macroconf/notebooks\x94ub.'); from snakemake.logging import logger; logger.printshellcmds = False; import os; os.chdir(r'/biggin/b147/univ4859/research/03_macroconf');
######## snakemake preamble end #########

1.4. Analysis Notebook¶

A copy of this notebook is run to analyse the molecular dynamics simulations.

This notebook refers to compound 22. The simulation type is GaMD, 2000 ns. The simulation was performed in H2O
# show a 2d image of the molecule, and a 3d structure via py3dmol!
# also put sequence here.
#compute rmsd for different atom types
rmsds = md.rmsd(t, t, 0)
bo = topo.topology.select("protein and (backbone and name O)")
ca = topo.topology.select("name CA")
rmsds_ca =  md.rmsd(t,t,0,atom_indices=ca) * 10  # Convert to Angstrom!
rmsds_bo = md.rmsd(t,t,0,atom_indices=bo) * 10  # Convert to Angstrom!

# rmsds_ca = rmsds_ca[::100]
# rmsds_bo = rmsds_bo[::100]

# make this a bokeh plot (with dropdown to choose which atom type to consider for rmsd!)
plt.plot(rmsds_ca, label='CA')
plt.plot(rmsds_bo, label='BO')
plt.legend()
plt.title('RMSD of different atoms over the simulation time')
plt.xlabel('simulation time')
plt.ylabel('RMSD in A relative to the initial frame')
plt.show()
../_images/ed6dd3148ef9b069_GaMD_processed_8_0.png
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import Slider, CheckboxGroup, CustomJS, ColumnDataSource, CDSView, CheckboxButtonGroup
from bokeh.models.filters import CustomJSFilter
from bokeh.layouts import row
from bokeh.transform import factor_cmap
from bokeh.palettes import Category10_10
output_notebook()
Loading BokehJS ...
# RMSF might be interesting as well!
rmsf_ca = md.rmsf(t,t,0, ca)
rmsf_bo = md.rmsf(t,t,0, bo)

plt.plot(rmsf_ca, label='CA')
plt.plot(rmsf_bo, label='BO')
plt.legend()
plt.title('RMSF for different atom types')
plt.show()
../_images/ed6dd3148ef9b069_GaMD_processed_12_0.png
compound = json_load(snakemake.input.parm)
multi = compound.multi
multi
if multi is not None:
    multi = {v: k for k, v in multi.items()}
    multiple = True
    distinction = compound.distinction
    print("Multiple compounds detected")
else:
    multiple = False
    pickle_dump(snakemake.output.multiple, multiple)
if multiple:  # if Compound.cistrans:
    ca_c = t.top.select(f"resid {distinction[0]} and name CA C")
    n_ca_next = t.top.select(f"resid {distinction[1]} and name N CA")
    omega = np.append(ca_c, n_ca_next)
    t_omega_rad = md.compute_dihedrals(t, [omega])
    t_omega_deg = np.abs(np.degrees(t_omega_rad))
    plt.plot(t_omega_deg)
    plt.hlines(90, 0, t.n_frames, color='red')
    plt.xlabel('Frames')
    plt.ylabel('Omega 0-1 [°]')
    plt.title(f"Dihedral angle over time. Compound {compound_index}")
    cis = np.where(t_omega_deg <= 90)[0]
    trans = np.where(t_omega_deg > 90)[0]
    pickle_dump(snakemake.output.multiple, (cis,trans))
    #t[trans]
# Make this interactive with bokeh -> click on legend hides angles. also make legend, s.t. it says the res-name e.g. G1, X2, Asp-3
omega = getOmega(t)
omega_deg = np.abs(np.degrees(omega))
plt.plot(omega_deg)
plt.title(f"Omega angles over time. Compound {compound_index}")
Text(0.5, 1.0, 'Omega angles over time. Compound 22')
../_images/ed6dd3148ef9b069_GaMD_processed_15_1.png
phi, psi, omega = getDihedrals(t)
print(np.degrees(src.analyse.angle_mean(phi)), np.degrees(src.analyse.angle_mean(psi)), np.degrees(src.analyse.angle_mean(omega)))
[-137.8573    -74.341484  -80.85069   -95.27573  -115.06232 ] [113.09379 -36.22547 -59.45217 -45.72217 -46.09534] [  -7.7789993 -169.58467    176.77768    164.16508    177.1614   ]

1.4.1. Principle Component Analysis (PCA)¶

# make pca as tabs for report!

1.4.1.1. Cartesian PCA¶

c_pca, reduced_cartesian = make_PCA(t, 'cartesian')
fig, ax = plt.subplots()
plt.close()
ax = plot_PCA(reduced_cartesian, 'cartesian', compound_index)
../_images/ed6dd3148ef9b069_GaMD_processed_21_0.png

1.4.1.2. Pairwise distances PCA¶

# pd_pca, reduced_pd = make_PCA(t, 'pairwise_d')
# fig, ax = plt.subplots()
# plt.close()
# ax = plot_PCA(reduced_pd, 'cartesian', compound_index, t.time, 'Time')

1.4.1.3. Dihedral PCA¶

pca_d, reduced_dihedrals = make_PCA(t, 'dihedral')
reduced_dihedrals_full = getDih(t)

# save pca object & reduced dihedrals
pickle_dump(snakemake.output.dPCA, pca_d)
pickle_dump(snakemake.output.dihedrals, reduced_dihedrals_full)

# reweighting:
if snakemake.params.method == "cMD":
    d_weights = reweight(reduced_dihedrals, None, 'noweight')
else:
    weight_data = np.loadtxt(snakemake.input.weights)
    weight_data = weight_data[::stride]
    d_weights = reweight(reduced_dihedrals, snakemake.input.weights, 'amdweight_MC', weight_data)
    #d_weights = reduced_dihedrals[:,0] * 0
if multiple:
    fig, axs = plt.subplots(1,2, sharex='all', sharey='all')
    fig.set_figwidth(10)
    #plt.close()
    axs[0] = plot_PCA(reduced_dihedrals, 'dihedral', compound_index, d_weights, 'Energy [kcal/mol]', fig, axs[0])
    axs[1] = src.pca.plot_PCA_citra(reduced_dihedrals[cis], reduced_dihedrals[trans], 'dihedral', compound_index, [multi['cis'] + ' (cis)', multi['trans'] + ' (trans)'], fig, axs[1])
    fig.savefig(snakemake.output.pca_dihe)
else:
    fig, ax = plt.subplots()
    ax = plot_PCA(reduced_dihedrals, 'dihedral', compound_index, d_weights, 'Energy [kcal/mol]', fig, ax)
    fig.savefig(snakemake.output.pca_dihe)

pickle_dump(snakemake.output.dPCA_weights_MC, d_weights)
../_images/ed6dd3148ef9b069_GaMD_processed_25_0.png
# if compound_index == 22:
#     result = np.random.rand(2,10000)

#     for i in range(10000):
#         phi = [-173, -58, -102, -104, -77]
#         psi = [-171, -12, -77, -101, 46]
#         omega = (np.random.rand(1,5) - 0.5) * 2 * 180
#         #omega = [170, 170, 170, 170, 170]

#         d1 = np.cos(np.radians(phi))
#         d2 = np.sin(np.radians(phi))
#         d3 = np.cos(np.radians(psi))
#         d4 = np.sin(np.radians(psi))
#         d5 = np.cos(np.radians(omega[0]))
#         d6 = np.sin(np.radians(omega[0]))

#         manual = np.hstack((d1,d2,d3,d4,d5,d6))
#         result[:,i]= (pca_d.transform([manual]))

#     ax.scatter(result[0,:], result[1,:])
#     fig
# result = np.random.rand(2,10000)

# for i in range(10000):
#     phi = [-169, -60, -116, -121, 51]
#     psi = [-160, -13, -84, 111, 42]
#     omega = (np.random.rand(1,5) - 0.5) * 2 * 180
#     #omega = [170, 170, 170, 170, 170]

#     d1 = np.cos(np.radians(phi))
#     d2 = np.sin(np.radians(phi))
#     d3 = np.cos(np.radians(psi))
#     d4 = np.sin(np.radians(psi))
#     d5 = np.cos(np.radians(omega[0]))
#     d6 = np.sin(np.radians(omega[0]))

#     manual = np.hstack((d1,d2,d3,d4,d5,d6))
#     result[:,i]= (pca_d.transform([manual]))
# ax.scatter(result[0,:], result[1,:])
# fig
# pos = reduced_dihedrals
# src.utils.link_ngl_wdgt_to_ax_pos(ax, pos, v)
# v
# phi, psi, omega = getDihedrals(t[95852])
# print(np.mean(np.degrees(phi), axis=0), np.mean(np.degrees(psi), axis=0), np.mean(np.degrees(omega), axis=0))
# Clustering
# TSNE dimensionality reduction
dihe = src.pca.getDih(t)
tsne = TSNE(n_components=2, verbose=1, perplexity=50, n_iter=2000, random_state=42)
tsne_results = tsne.fit_transform(dihe[::125,:]) # 250
plt.scatter(tsne_results[:,0], tsne_results[:,1])
[t-SNE] Computing 151 nearest neighbors...
[t-SNE] Indexed 8000 samples in 0.001s...
[t-SNE] Computed neighbors for 8000 samples in 0.756s...
[t-SNE] Computed conditional probabilities for sample 1000 / 8000
[t-SNE] Computed conditional probabilities for sample 2000 / 8000
[t-SNE] Computed conditional probabilities for sample 3000 / 8000
[t-SNE] Computed conditional probabilities for sample 4000 / 8000
[t-SNE] Computed conditional probabilities for sample 5000 / 8000
[t-SNE] Computed conditional probabilities for sample 6000 / 8000
[t-SNE] Computed conditional probabilities for sample 7000 / 8000
[t-SNE] Computed conditional probabilities for sample 8000 / 8000
[t-SNE] Mean sigma: 0.387711
[t-SNE] KL divergence after 250 iterations with early exaggeration: 72.106773
[t-SNE] KL divergence after 2000 iterations: 1.522005
<matplotlib.collections.PathCollection at 0x7f4f96062880>
../_images/ed6dd3148ef9b069_GaMD_processed_32_2.png
# Derive epsilon for DBSCAN-clustering from data: epsilon = max distance between nearest neighbors
nbrs = NearestNeighbors(n_neighbors=2).fit(tsne_results) #
distances, indices = nbrs.kneighbors(tsne_results)
epsilon = distances.max()
distances = np.sort(distances, axis=0)
distances = distances[:,1]
plt.plot(distances)
plt.title("NN-distances in tsne plot")
plt.ylabel("NN-distance")
plt.show()
../_images/ed6dd3148ef9b069_GaMD_processed_33_0.png
# Perform DBSCAN-clustering with varying min_samples parameter
num_clusters = []
num_noise = []
for i in range(0,200,1):
    clustering = DBSCAN(eps=epsilon, min_samples=i).fit(tsne_results)
    labels = clustering.labels_
    n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
    n_noise = list(labels).count(-1)
    num_clusters.append(n_clusters)
    num_noise.append(n_noise)
    
# Drop all points following the first detection of 0 clusters
num_clusters = np.array(num_clusters)
cutoff = np.argmin(num_clusters>0)
num_clusters=num_clusters[:cutoff]
print(num_clusters)

x = np.arange(0, len(num_clusters))
[24 24 24 24 23 22 23 23 21 22 22 24 24 25 25 25 23 24 30 33 36 38 44 43
 47 48 47 42 41 38 31 27 25 27 24 27 32 25 25 23 19 18 17 17 17 18 18 19
 18 18 18 18 17 16 14 14 14 14 13 13 12 12 11 11 11 11 10  8  7  6  4  3
  3  2  1  1  1  1  1]
# Fit polynomial to detect right-most plateau
if x.size > 0:
    mymodel = np.poly1d(np.polyfit(x, num_clusters, 8))

    deriv = mymodel.deriv()
    roots = deriv.roots

    # discard complex roots
    r_roots = roots[np.isreal(roots)].real

    # discard negative values
    r_roots = r_roots[r_roots >= 0]

    # discard values greater than x.max()
    r_roots = r_roots[r_roots <= x.max() - 3]

    # Take largest root
    if r_roots != []:
        min_samples = int(r_roots.max())
        print(f"min_samples = {min_samples} was selected as parameter for clustering")
    else:
        min_samples = 15
        print('Caution! min samples parameter was selected as fixed value b/c automatic determination failed. specify the parameter manually in the config!')
else:
    min_samples = 15
# If config overrides, use config value:
if snakemake.wildcards.index in snakemake.config["cluster_conf"]:
    min_samples = int(snakemake.config["cluster_conf"][snakemake.wildcards.index])
    print(f"Override: Use min_samples={min_samples} instead of the above determined parameter")

# if str(compound_index) in snakemake.config[f"{snakemake.params.method}_clustering"]:
#     min_samples = int(snakemake.config[f"{snakemake.params.method}_clustering"][f"{compound_index}"])
#     print(f"Override: Use min_samples={min_samples} instead of the above determined parameter")
min_samples = 53 was selected as parameter for clustering
Override: Use min_samples=8 instead of the above determined parameter
/tmp/ipykernel_2207/2420957471.py:18: DeprecationWarning: elementwise comparison failed; this will raise an error in the future.
  if r_roots != []:
# min_samples = 8
# print(f"Override: Use min_samples={min_samples} instead of the above determined parameter")
plt.scatter(x, num_clusters, label='clustering with different min_sample parm.')
plt.plot(x, mymodel(x), label='poly-fit')
plt.vlines(min_samples, 2, num_clusters.max(), color='red', label='selected min_sample paramter')
plt.plot(x, deriv(x), label='derivative of poly-fit')
plt.legend(loc='lower left')
plt.title('Determining min_samples parameter for clustering')
plt.xlabel('min_samples parameter')
plt.ylabel('Number of clusters observed')
plt.savefig(snakemake.output.cluster_min_samp)
../_images/ed6dd3148ef9b069_GaMD_processed_37_0.png
plt.plot(num_noise, label='Number of points classified as noise')
plt.xlabel('min_samples parameter')
plt.ylabel('Number of points classified as noise')
plt.title('Number of points classified as noise')
plt.show()
../_images/ed6dd3148ef9b069_GaMD_processed_38_0.png
# Perform clustering for selected min_samples parameter
clustering = DBSCAN(eps=epsilon, min_samples=min_samples).fit(tsne_results)

threshhold = 0.01 # 0.05

n_clusters = len(set(clustering.labels_)) - (1 if -1 in clustering.labels_ else 0)
print(f"There are {n_clusters} clusters")

cluster_points = []
cluster_label_filter = []

for cluster in set(clustering.labels_):
    if cluster != -1:
        if len(clustering.labels_[clustering.labels_ == cluster]) >= threshhold * len(clustering.labels_):
            clus_points = tsne_results[clustering.labels_ == cluster]
            plt.plot(clus_points[:,0], clus_points[:,1], marker='.', linewidth=0, label=f"Cluster {cluster}")
            percentage = len(clustering.labels_[clustering.labels_ == cluster]) / len(clustering.labels_)
            print(f"Cluster {cluster} makes up more than 5% of points. ({percentage * 100} % of total points)")
            cluster_points.append(clustering.labels_ == cluster)
            cluster_label_filter.append(cluster)
        else:
            clus_points = tsne_results[clustering.labels_ == cluster]
            plt.plot(clus_points[:,0], clus_points[:,1], marker='.', linewidth=0, label=f"Cluster {cluster}", alpha=0.1)
            percentage = len(clustering.labels_[clustering.labels_ == cluster]) / len(clustering.labels_)
            print(f"Exlude Cluster {cluster} is less than 5% of points. ({percentage * 100} % of total points)")
            plt.plot
    else:
        clus_points = tsne_results[clustering.labels_ == cluster]
        plt.plot(clus_points[:,0], clus_points[:,1], marker='.', linewidth=0, label=f"Noise", alpha=0.1, color='grey')
        
        percentage = len(clustering.labels_[clustering.labels_ == cluster]) / len(clustering.labels_)
        print(f"Noise makes up {percentage * 100} % of total points.")
plt.legend()
plt.title('Clusters and associated points. Noise and clusters with < 5% of points are shown in faded')
plt.tight_layout()
plt.savefig(snakemake.output.cluster_plot)
There are 21 clusters
Cluster 0 makes up more than 5% of points. (52.800000000000004 % of total points)
Cluster 1 makes up more than 5% of points. (8.725 % of total points)
Cluster 2 makes up more than 5% of points. (3.4625000000000004 % of total points)
Cluster 3 makes up more than 5% of points. (2.5250000000000004 % of total points)
Exlude Cluster 4 is less than 5% of points. (0.25 % of total points)
Cluster 5 makes up more than 5% of points. (1.275 % of total points)
Exlude Cluster 6 is less than 5% of points. (0.8 % of total points)
Cluster 7 makes up more than 5% of points. (1.4874999999999998 % of total points)
Cluster 8 makes up more than 5% of points. (1.175 % of total points)
Cluster 9 makes up more than 5% of points. (1.275 % of total points)
Exlude Cluster 10 is less than 5% of points. (0.1875 % of total points)
Cluster 11 makes up more than 5% of points. (4.4125 % of total points)
Cluster 12 makes up more than 5% of points. (8.3 % of total points)
Cluster 13 makes up more than 5% of points. (1.8875 % of total points)
Cluster 14 makes up more than 5% of points. (1.5375 % of total points)
Cluster 15 makes up more than 5% of points. (1.5 % of total points)
Cluster 16 makes up more than 5% of points. (3.0 % of total points)
Cluster 17 makes up more than 5% of points. (1.875 % of total points)
Exlude Cluster 18 is less than 5% of points. (0.2875 % of total points)
Cluster 19 makes up more than 5% of points. (1.4625000000000001 % of total points)
Cluster 20 makes up more than 5% of points. (1.4000000000000001 % of total points)
Noise makes up 0.375 % of total points.
../_images/ed6dd3148ef9b069_GaMD_processed_39_1.png
plt.plot(clustering.labels_, marker=1, linewidth=0 )
plt.title('Clusters over time (-1 is noise)')
plt.xlabel('Snapshot')
plt.ylabel('Cluster')
plt.savefig(snakemake.output.cluster_time)
../_images/ed6dd3148ef9b069_GaMD_processed_40_0.png
# Find cluster points in original trajectory, compute average structure, 
# then find closest (min-rmsd) cluster structure to this
reduced_ind = np.arange(0,len(dihe), 125)
reduced_g_dihe = dihe[reduced_ind,:]
cluster_min_pca = []
cluster_index = []

t0 = t[0].time
dt = t.timestep

for i, cluster_name in zip(cluster_points, cluster_label_filter):
    
    # cluster points in original trajectory
    indices = reduced_ind[i]
    avg_struct = np.mean(t[indices].xyz, axis=0)
    avg_t = md.Trajectory(xyz=avg_struct, topology=None)
    
    # compute average dihedral angles for each cluster:
    phi, psi, omega = getDihedrals(t[indices])
    print(np.degrees(src.analyse.angle_mean(phi)), np.degrees(src.analyse.angle_mean(psi)), np.degrees(src.analyse.angle_mean(omega)))
    
    # find min-RMSD structure to the average
    rmsd = md.rmsd(t[indices], avg_t, 0)
    min_rmsd_idx = np.where(rmsd == rmsd.min())
    cluster_min = t[indices][min_rmsd_idx]
    cluster_index.append(int((cluster_min.time - t0) / dt))
    print(f"Cluster {cluster_name}: Closest min structure is frame {int((cluster_min.time - t0) / dt)} (time: {float(cluster_min.time)})")
    # Compute dihedrals of min-RMSD cluster structure, and transform to PCA
    cluster_min = getDih(cluster_min)
    cluster_min_pca.append(pca_d.transform(cluster_min))
    
[-150.68396  -76.53684  -84.16261  -92.07799 -125.64173] [107.61007  -34.473133 -52.527847 -47.173622 -46.883198] [ -11.641534 -159.83403   175.04112   167.56938   175.81676 ]
Cluster 0: Closest min structure is frame 591875 (time: 1235752.0)
[ -57.416344  -81.69342   -79.233055 -145.4573    -71.24101 ] [129.70282  -27.42081  -51.032864 -56.857693 138.82    ] [   6.296922 -161.95824   175.02863   133.43633  -162.6689  ]
Cluster 1: Closest min structure is frame 321125 (time: 694252.0)
[-54.272854 -83.84947  -93.230804 -69.153465 -89.008575] [ 127.75092    -8.619949 -129.79474   -44.231823  129.79698 ] [  16.190983  179.88754   173.31999   144.2717   -164.2476  ]
Cluster 2: Closest min structure is frame 136625 (time: 325252.0)
[  58.20588   -84.44945   -86.817154 -100.78626  -129.79498 ] [ 92.71926  -37.163666 -56.573368 -46.6781    85.69204 ] [   0.8313287 -159.926      172.92993    165.07803   -177.02519  ]
Cluster 3: Closest min structure is frame 137625 (time: 327252.0)
[-131.38959  -64.05873 -136.79083 -107.68947  -89.71494] [-52.24896  -43.660824 -55.252808 -46.632343 -58.47447 ] [ 172.34698 -179.96251  177.06523  171.86159  168.43626]
Cluster 5: Closest min structure is frame 745625 (time: 1543252.0)
[-129.33394   -67.949776 -146.26587   -82.74587    60.75602 ] [-49.718582 -50.38793  -57.208115 141.06223  -72.79105 ] [ 175.68727  172.64223  151.32109 -174.80083  178.08464]
Cluster 7: Closest min structure is frame 60375 (time: 172752.0)
[-142.10268   -62.291603  -58.089573 -100.876945   64.972206] [ -36.141335 -157.67873   -33.670944  134.44547   -59.398396] [139.9653  175.27531 155.74402 176.54001 178.93687]
Cluster 8: Closest min structure is frame 59875 (time: 171752.0)
[-148.94844   -67.5717   -102.87831   -92.2456     58.628662] [130.62375  -39.216007 -50.24834   96.73601  -62.718735] [ -14.113834 -175.60463   169.41922  -156.81819   172.65604 ]
Cluster 9: Closest min structure is frame 70500 (time: 193002.0)
[ -67.07059   -55.96993   -81.766685 -145.5943    -98.07615 ] [-174.20569   -15.500388  -47.290478  -68.75567   -32.70875 ] [ 166.1054   168.37828 -179.30328  166.70996  148.5038 ]
Cluster 11: Closest min structure is frame 873375 (time: 1798752.0)
[-153.82472   -68.82306    60.708134  -71.45753  -135.96028 ] [108.509895 169.20247  -77.4811   -41.596233 -61.930954] [ -14.474    166.51572 -179.69412  162.30026  171.3672 ]
Cluster 12: Closest min structure is frame 520000 (time: 1092002.0)
[  52.149956  -72.354004   54.937588  -73.247986 -110.50608 ] [  85.53945   158.06888  -107.656845  -37.220703   20.967108] [  11.260858  178.66539   179.51627   175.97485  -167.87276 ]
Cluster 13: Closest min structure is frame 919250 (time: 1890502.0)
[-136.15942   -68.162155 -108.338524 -144.10384   -76.98615 ] [128.97685  -29.236767 -46.89699   90.964386 -29.469967] [  -5.364112 -176.716    -175.0981    -12.369749  178.76302 ]
Cluster 14: Closest min structure is frame 371875 (time: 795752.0)
[-140.19588   -64.287506 -118.937126   43.31931   -90.474556] [129.95334  -31.7925   135.38692   71.64727  -35.823715] [ -10.364739  165.11989   176.70978    -0.619187 -175.19843 ]
Cluster 15: Closest min structure is frame 388000 (time: 828002.0)
[-154.784     -66.02331   -78.95472    60.800755 -115.44232 ] [117.87805  -39.624435 140.5959   -64.42076  -51.200523] [ -25.754246  172.46326  -172.1372    170.21687   179.40154 ]
Cluster 16: Closest min structure is frame 409750 (time: 871502.0)
[  54.522327  -86.431755  -81.550514   60.933105 -136.37476 ] [ 72.53072  -40.069183 116.831856 -53.88554  120.234795] [  -9.0440855 -169.31694   -161.83904    162.38048    166.72272  ]
Cluster 17: Closest min structure is frame 454000 (time: 960002.0)
[-148.51622   -66.24922   -81.39045    50.76755    62.669968] [ 98.3133   -39.72472  143.43771   61.9014   -20.940727] [ -30.80887  160.84879 -162.43863 -169.00635  178.45291]
Cluster 19: Closest min structure is frame 447375 (time: 946752.0)
[-149.97968   -63.238327   60.98254  -141.47786   -79.91288 ] [118.903915 152.89912  -64.4008    94.152145 -39.508778] [ -13.179446  177.16798  -177.21915   -10.035109  177.6571  ]
Cluster 20: Closest min structure is frame 798125 (time: 1648252.0)
# Plot cluster mins in d-PCA plot
fig, ax = plt.subplots()
ax.scatter(reduced_dihedrals[:,0], reduced_dihedrals[:,1], marker='.', s=0.5, alpha=1, c='black')
for i,j in zip(cluster_min_pca, cluster_label_filter):
    ax.plot(i[:,0],i[:,1], marker='^', label=f"Cluster {j}", linewidth=0)
ax.legend()
ax.set_title('Dihedral PCA, and associated cluster minima')
fig.savefig(snakemake.output.cluster_pca)
/tmp/ipykernel_2207/210811007.py:8: UserWarning: Creating legend with loc="best" can be slow with large amounts of data.
  fig.savefig(snakemake.output.cluster_pca)
/biggin/b147/univ4859/research/03_macroconf/.snakemake/conda/bc755c0e13227b06a9186f87b9557a8f/lib/python3.9/site-packages/IPython/core/pylabtools.py:134: UserWarning: Creating legend with loc="best" can be slow with large amounts of data.
  fig.canvas.print_figure(bytes_io, **kw)
../_images/ed6dd3148ef9b069_GaMD_processed_42_1.png
cluster_traj = t[cluster_index]
cluster_traj.superpose(cluster_traj, 0, atom_indices = cluster_traj.top.select('backbone'))
view = nv.show_mdtraj(cluster_traj)
view
# compute rmsd between clusters
from itertools import combinations
indices = list(combinations(range(cluster_traj.n_frames), 2))

rmsd = np.zeros((cluster_traj.n_frames, cluster_traj.n_frames))
for i, j in indices:
    rmsd[i,j] = md.rmsd(cluster_traj[i], cluster_traj[j], atom_indices = cluster_traj.top.select('backbone')) * 10
rmsd
#md.rmsd(cluster_traj) * 10
array([[0.        , 1.21006596, 1.35707402, 1.0373776 , 1.44431412,
        1.54712129, 1.34791017, 1.04482269, 1.35082018, 0.82759708,
        1.38936567, 1.25908601, 1.2916714 , 0.87101829, 1.26765239,
        1.1636281 , 1.40254998],
       [0.        , 0.        , 0.67931229, 0.59174269, 1.45832658,
        1.77097929, 1.76755118, 1.5611769 , 1.3189429 , 1.4620012 ,
        1.43339658, 1.64959121, 1.7004025 , 1.28998327, 0.85875475,
        1.70095384, 1.73832715],
       [0.        , 0.        , 0.        , 0.90512979, 1.37683403,
        1.59793353, 1.68815875, 1.72055268, 1.43167198, 1.46743178,
        1.17825305, 1.69617105, 1.80028009, 1.45067322, 1.05821323,
        1.82819176, 1.73907459],
       [0.        , 0.        , 0.        , 0.        , 1.43872428,
        1.71662879, 1.603526  , 1.55109632, 1.38878345, 1.32922769,
        1.2569344 , 1.71077895, 1.7548573 , 1.25885677, 0.79568124,
        1.61804271, 1.70379055],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        1.05192995, 1.39211667, 1.81994247, 0.97972333, 1.56630349,
        1.38572633, 1.7662729 , 1.80182672, 1.75874841, 1.74496675,
        1.92842352, 1.96065474],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.74436957, 1.50903916, 1.48065972, 1.49220967,
        1.47030067, 1.45437133, 1.7208488 , 1.90232539, 1.93203521,
        1.7682848 , 1.51959014],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 1.27410817, 1.67125833, 1.33240175,
        1.36392999, 1.43588734, 1.66482854, 1.70395386, 1.82030821,
        1.52195752, 1.35088861],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 1.73062229, 1.20845664,
        1.79418612, 0.66912353, 1.05809045, 1.37804818, 1.73943448,
        1.00936663, 1.03443849],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 1.72127795,
        1.72794032, 1.81248128, 1.72501624, 1.56343436, 1.59692276,
        1.69002604, 2.08202863],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        1.09584069, 1.28322649, 1.49526119, 1.22290576, 1.53093767,
        1.43845367, 0.9388082 ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 1.81133199, 1.89117098, 1.57543731, 1.39607024,
        1.80133045, 1.4769398 ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.95130861, 1.57540071, 1.92377818,
        1.33717489, 0.92320538],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 1.12060392, 1.65300512,
        0.88305426, 1.42359567],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.94588405,
        0.93331224, 1.69600153],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        1.43222046, 1.91773391],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 1.54104769],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        ]])
# compute dihedral angles
omega = getOmega(cluster_traj)
omega_deg = np.abs(np.degrees(omega))
plt.plot(omega_deg)
plt.title(f"Omega angles over time. Compound {compound_index}")
plt.show()
../_images/ed6dd3148ef9b069_GaMD_processed_46_0.png
# view = nv.show_mdtraj(cluster_traj)
# view
# import tempfile
# tmp_dir = tempfile.mkdtemp()
# pdb_temp = tempfile.mktemp(suffix=".pdb", dir=tmp_dir)
# png_temp = tempfile.mktemp(suffix=".pdb", dir=tmp_dir)
cluster_traj.save_pdb(snakemake.output.cluster_pdb)
data=[]
pymol_script = f"""load {snakemake.output.cluster_pdb}
# inspired by: https://gist.github.com/bobbypaton/1cdc4784f3fc8374467bae5eb410edef
cmd.bg_color("white")
cmd.set("ray_opaque_background", "off")
cmd.set("orthoscopic", 0)
cmd.set("transparency", 0.1)
cmd.set("dash_gap", 0)
cmd.set("ray_trace_mode", 1)
cmd.set("ray_texture", 2)
cmd.set("antialias", 3)
cmd.set("ambient", 0.5)
cmd.set("spec_count", 5)
cmd.set("shininess", 50)
cmd.set("specular", 1)
cmd.set("reflect", .1)
cmd.space("cmyk")

#cmd.cartoon("oval")
cmd.show("sticks")
cmd.show("spheres")
cmd.color("gray85","elem C")
cmd.color("gray98","elem H")
cmd.color("slate","elem N")
cmd.color("red","elem O")
cmd.set("stick_radius",0.07)
cmd.set("sphere_scale",0.18)
cmd.set("sphere_scale",0.13, "elem H")
cmd.set("dash_gap",0.01)
cmd.set("dash_radius",0.07)
cmd.set("stick_color","black")
cmd.set("dash_gap",0.01)
cmd.set("dash_radius",0.035)
cmd.hide("nonbonded")
cmd.hide("cartoon")
cmd.hide("lines")
cmd.orient()
cmd.zoom()
cmd.hide("labels")

cmd.mpng("{snakemake.params.cluster_dir}test_", width=1000, height=1000)

"""
pymol_script_file = f"{snakemake.params.cluster_dir}pym.pml"
with open(pymol_script_file, "w") as f:
    f.write(pymol_script)
!pymol -c $pymol_script_file
 PyMOL(TM) Molecular Graphics System, Version 2.5.0.
 Copyright (c) Schrodinger, LLC.
 All Rights Reserved.
 
    Created by Warren L. DeLano, Ph.D. 
 
    PyMOL is user-supported open-source software.  Although some versions
    are freely available, PyMOL is not in the public domain.
 
    If PyMOL is helpful in your work or study, then please volunteer 
    support for our ongoing efforts to create open and affordable scientific
    software by purchasing a PyMOL Maintenance and/or Support subscription.

    More information can be found at "http://www.pymol.org".
 
    Enter "help" for a list of commands.
    Enter "help <command-name>" for information on a specific command.

 Hit ESC anytime to toggle between text and graphics.

 Detected 24 CPU cores.  Enabled multithreaded rendering.
PyMOL>load data/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_clusters/clusters.pdb
 ObjectMolecule: Read crystal symmetry information.
 ObjectMoleculeReadPDBStr: read MODEL 1
 ObjectMoleculeReadPDBStr: read MODEL 2
 ObjectMoleculeReadPDBStr: read MODEL 3
 ObjectMoleculeReadPDBStr: read MODEL 4
 ObjectMoleculeReadPDBStr: read MODEL 5
 ObjectMoleculeReadPDBStr: read MODEL 6
 ObjectMoleculeReadPDBStr: read MODEL 7
 ObjectMoleculeReadPDBStr: read MODEL 8
 ObjectMoleculeReadPDBStr: read MODEL 9
 ObjectMoleculeReadPDBStr: read MODEL 10
 ObjectMoleculeReadPDBStr: read MODEL 11
 ObjectMoleculeReadPDBStr: read MODEL 12
 ObjectMoleculeReadPDBStr: read MODEL 13
 ObjectMoleculeReadPDBStr: read MODEL 14
 ObjectMoleculeReadPDBStr: read MODEL 15
 ObjectMoleculeReadPDBStr: read MODEL 16
 ObjectMoleculeReadPDBStr: read MODEL 17
 CmdLoad: "" loaded as "clusters".
PyMOL>cmd.bg_color("white")
PyMOL>cmd.set("ray_opaque_background", "off")
PyMOL>cmd.set("orthoscopic", 0)
PyMOL>cmd.set("transparency", 0.1)
PyMOL>cmd.set("dash_gap", 0)
PyMOL>cmd.set("ray_trace_mode", 1)
PyMOL>cmd.set("ray_texture", 2)
PyMOL>cmd.set("antialias", 3)
PyMOL>cmd.set("ambient", 0.5)
PyMOL>cmd.set("spec_count", 5)
PyMOL>cmd.set("shininess", 50)
PyMOL>cmd.set("specular", 1)
PyMOL>cmd.set("reflect", .1)
PyMOL>cmd.space("cmyk")
 Color: loaded table '/biggin/b147/univ4859/research/03_macroconf/.snakemake/conda/bc755c0e13227b06a9186f87b9557a8f/lib/python3.9/site-packages/pymol/pymol_path/data/pymol/cmyk.png'.
PyMOL>cmd.show("sticks")
PyMOL>cmd.show("spheres")
PyMOL>cmd.color("gray85","elem C")
PyMOL>cmd.color("gray98","elem H")
PyMOL>cmd.color("slate","elem N")
PyMOL>cmd.color("red","elem O")
PyMOL>cmd.set("stick_radius",0.07)
PyMOL>cmd.set("sphere_scale",0.18)
PyMOL>cmd.set("sphere_scale",0.13, "elem H")
PyMOL>cmd.set("dash_gap",0.01)
PyMOL>cmd.set("dash_radius",0.07)
PyMOL>cmd.set("stick_color","black")
PyMOL>cmd.set("dash_gap",0.01)
PyMOL>cmd.set("dash_radius",0.035)
PyMOL>cmd.hide("nonbonded")
PyMOL>cmd.hide("cartoon")
PyMOL>cmd.hide("lines")
PyMOL>cmd.orient()
PyMOL>cmd.zoom()
PyMOL>cmd.hide("labels")
PyMOL>cmd.mpng("data/processed/refactor-test/results/22/H2O/GaMD/2000/0/ed6dd3148ef9b069_clusters/test_", width=1000, height=1000)
 Movie: frame    1 of   17, 0.97 sec. (0:00:16 - 0:00:16 to go).
 Movie: frame    2 of   17, 1.01 sec. (0:00:16 - 0:00:15 to go).
 Movie: frame    3 of   17, 0.97 sec. (0:00:14 - 0:00:14 to go).
 Movie: frame    4 of   17, 0.97 sec. (0:00:13 - 0:00:13 to go).
 Movie: frame    5 of   17, 0.97 sec. (0:00:12 - 0:00:12 to go).
 Movie: frame    6 of   17, 0.98 sec. (0:00:11 - 0:00:11 to go).
 Movie: frame    7 of   17, 0.98 sec. (0:00:10 - 0:00:10 to go).
 Movie: frame    8 of   17, 0.99 sec. (0:00:09 - 0:00:09 to go).
 Movie: frame    9 of   17, 0.99 sec. (0:00:08 - 0:00:08 to go).
 Movie: frame   10 of   17, 0.96 sec. (0:00:07 - 0:00:07 to go).
 Movie: frame   11 of   17, 0.98 sec. (0:00:06 - 0:00:06 to go).
 Movie: frame   12 of   17, 0.96 sec. (0:00:05 - 0:00:05 to go).
 Movie: frame   13 of   17, 1.01 sec. (0:00:05 - 0:00:04 to go).
 Movie: frame   14 of   17, 0.94 sec. (0:00:03 - 0:00:03 to go).
 Movie: frame   15 of   17, 0.98 sec. (0:00:02 - 0:00:02 to go).
 Movie: frame   16 of   17, 0.96 sec. (0:00:01 - 0:00:01 to go).
 Movie: frame   17 of   17, 0.97 sec. (0:00:00 - 0:00:00 to go).
import matplotlib.image as mpimg

data = []
cluster_imgs = [f"{snakemake.params.cluster_dir}test_{str(i+1).zfill(4)}.png" for i in range(cluster_traj.n_frames)]

[data.append(mpimg.imread(img)) for img in cluster_imgs]
print("images read")
images read
import matplotlib.image as mpimg
import io
#get default colors
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
# make colors longer if more clusters than colors...
while len(cluster_label_filter) > len(colors):
    colors.extend(colors)
    print('Colors appended..')

fig, axs = plt.subplots(len(cluster_label_filter),3, sharex='col', squeeze=False)
fig.set_size_inches(12, 3*len(cluster_label_filter))
# plot cluster images
for i in range(cluster_traj.n_frames):
    #print(f"final {i}")
    axs[i,0].imshow(data[i])
    axs[i,0].tick_params(axis='both', which='both', bottom=False, top=False, left=False, labelleft=False, labelbottom=False)
    #axs[i,0].tick_params(axis='y', which='both', bottom=False, top=False, labelbottom=False)
# plot corresponding pca's:
for i in range(cluster_traj.n_frames):
    axs[i,1].scatter(reduced_dihedrals[:,0], reduced_dihedrals[:,1], marker='.', s=0.5, alpha=1, c='black')
# add cluster representations
for ii,iii,iiii in zip(cluster_min_pca, cluster_label_filter, range(len(cluster_label_filter))):
    clus, = axs[iiii,1].plot(ii[:,0],ii[:,1], marker='^', label=f"Cluster {iii}", linewidth=0, c=colors[iiii])
    #clus.get_color()
    

    
NOE_path = snakemake.input.noe

# add noe plots
for i, j, k in zip(range(cluster_traj.n_frames), cluster_index, cluster_label_filter):
    NOE = src.noe.read_NOE(NOE_path)
    if multiple:
        NOE_trans, NOE_cis = NOE
        NOE_cis_dict = NOE_cis.to_dict(orient='index')
        NOE_trans_dict = NOE_trans.to_dict(orient='index')
    else:
        NOE_dict = NOE.to_dict(orient='index')

    current_cluster = cluster_traj[i]
    #print(j)
    if multiple:
        if j in cis:
            #print("cis")
            NOE_dict = NOE_cis_dict
            NOE = NOE_cis
            axs[i,2].set_title(f"Cluster {k} (cis)")
        else:
            #print("trans!")
            NOE_dict = NOE_trans_dict
            NOE = NOE_trans
            axs[i,2].set_title(f"Cluster {k} (trans)")
    else:
        axs[i,2].set_title(f"Cluster {k}")
    NOE['md'],_,_2,NOE_dist, _3 = compute_NOE_mdtraj(NOE_dict, current_cluster)
    # Deal with ambigous NOEs
    NOE = NOE.explode('md')
    # and ambigous/multiple values
    NOE = NOE.explode('NMR exp')
    fig, axs[i,2] = plot_NOE(NOE, fig, axs[i,2])
fig.tight_layout()   
fig.savefig(snakemake.output.cluster_structs)
Colors appended..
../_images/ed6dd3148ef9b069_GaMD_processed_54_1.png

1.4.2. Compute NOEs¶

NOE_path = snakemake.input.noe
NOE = src.noe.read_NOE(NOE_path)
snakemake.input.noe
NOE_output = {}
if multiple:
    fig, axs = plt.subplots(2,1)
    fig.set_size_inches(10,6)
    NOE_trans, NOE_cis = NOE
    NOE_cis_dict = NOE_cis.to_dict(orient='index')
    NOE_trans_dict = NOE_trans.to_dict(orient='index')
    if len(cis) > 0:
        NOE_cis['md'],_,_2,NOE_dist_cis,_3 = compute_NOE_mdtraj(NOE_cis_dict, t[cis])
        
        NOE_output[f"{multi['cis']}"] = NOE_cis.to_dict(orient='index')
        # Deal with ambigous NOEs
        NOE_cis = NOE_cis.explode('md')
        # and ambigous/multiple values
        NOE_cis = NOE_cis.explode('NMR exp')
        fig, axs[1] = plot_NOE(NOE_cis, fig, axs[1])
        axs[1].set_title(f"Compound {multi['cis']} (cis)")
    else:
        print("Cis skipped because no frames are cis.")
    if len(trans) > 0:
        NOE_trans['md'],_,_2,NOE_dist_trans, _3 = compute_NOE_mdtraj(NOE_trans_dict, t[trans])
        
        NOE_output[f"{multi['trans']}"] = NOE_trans.to_dict(orient='index')
        # Deal with ambigous NOEs
        NOE_trans = NOE_trans.explode('md')
        # and ambigous/multiple values
        NOE_trans = NOE_trans.explode('NMR exp')
        
        fig, axs[0] = plot_NOE(NOE_trans, fig, axs[0])
        axs[0].set_title(f"Compound {multi['trans']} (trans)")
    else:
        print("Trans skipped because no frames are cis")
else:
    NOE_dict = NOE.to_dict(orient='index')
    NOE['md'],_,_2, NOE_dist, _3 = compute_NOE_mdtraj(NOE_dict, t)
    
    # Save NOE dict
    NOE_output = {f"{compound_index}": NOE.to_dict(orient='index')}
    # Deal with ambigous NOEs
    NOE = NOE.explode('md')
    # and ambigous/multiple values
    NOE = NOE.explode('NMR exp')
    fig, ax = plot_NOE(NOE)
    ax.set_title(f"Compound {compound_index}. NOE")
fig.tight_layout()
fig.savefig(snakemake.output.noe_plot)
# save as .json file
src.utils.json_dump(snakemake.output.noe_result, NOE_output)
../_images/ed6dd3148ef9b069_GaMD_processed_58_0.png
# # Reweighted NOEs

# if snakemake.params.method != "cMD":
#     if multiple:
#         fig, axs = plt.subplots(2,1)
#         fig.set_size_inches(10,6)
#         NOE_trans, NOE_cis = NOE
#         NOE_cis_dict = NOE_cis.to_dict(orient='index')
#         NOE_trans_dict = NOE_trans.to_dict(orient='index')
#         if len(cis) > 0:
#             NOE_cis['md'],_,_2,NOE_dist_cis, pmf_plt_cis = compute_NOE_mdtraj(NOE_cis_dict, t[cis], snakemake.input.weights, 0, cis)
#             # Deal with ambigous NOEs
#             NOE_cis = NOE_cis.explode('md')
#             # and ambigous/multiple values
#             NOE_cis = NOE_cis.explode('NMR exp')
#             fig, axs[1] = plot_NOE(NOE_cis, fig, axs[1])
#             axs[1].set_title(f"Compound {multi['cis']} (cis)")
#         else:
#             print("Cis skipped because no frames are cis.")
#         if len(trans) > 0:
#             NOE_trans['md'],_,_2,NOE_dist_trans, pmf_plot_trans = compute_NOE_mdtraj(NOE_trans_dict, t[trans], snakemake.input.weights, 0, trans)
#              # Deal with ambigous NOEs
#             NOE_trans = NOE_trans.explode('md')
#             # and ambigous/multiple values
#             NOE_trans = NOE_trans.explode('NMR exp')
#             fig, axs[0] = plot_NOE(NOE_trans, fig, axs[0])
#             axs[0].set_title(f"Compound {multi['trans']} (trans)")
#         else:
#             print("Trans skipped because no frames are cis")
#     else:
#         NOE = src.noe.read_NOE(NOE_path)
#         NOE_dict = NOE.to_dict(orient='index')
#         NOE['md'], *_ = src.noe.compute_NOE_mdtraj(NOE_dict, t, snakemake.input.weights)
#         # Deal with ambigous NOEs
#         NOE = NOE.explode('md')
#         # and ambigous/multiple values
#         NOE = NOE.explode('NMR exp')
#         fig, ax = plot_NOE(NOE)
#         ax.set_title(f"Compound {compound_index}. NOE")
#         fig.tight_layout()
#         fig.tight_layout()
#         fig.savefig(snakemake.output.noe_plot)
# else:
#     print("cMD - no reweighted NOEs performed.")
# 1d PMF reweighted NOEs

NOE_output = {}

if snakemake.params.method != "cMD":
    if multiple:
        fig, axs = plt.subplots(2,1)
        fig.set_size_inches(10,6)
        NOE_trans, NOE_cis = NOE
        NOE_cis_dict = NOE_cis.to_dict(orient='index')
        NOE_trans_dict = NOE_trans.to_dict(orient='index')
        if len(cis) > 0:
            NOE_cis['md'],NOE_cis['lower'], NOE_cis['upper'],NOE_dist_cis, pmf_plot_cis = compute_NOE_mdtraj(NOE_cis_dict, t[cis], snakemake.input.weights, 1, cis)
            
            NOE_output[f"{multi['cis']}"] = NOE_cis.to_dict(orient='index')
            
            # Deal with ambigous NOEs
            NOE_cis = NOE_cis.explode(['md', 'lower', 'upper'])
            # and ambigous/multiple values
            NOE_cis = NOE_cis.explode('NMR exp')
            fig, axs[1] = plot_NOE(NOE_cis, fig, axs[1])
            axs[1].set_title(f"Compound {multi['cis']} (cis)")
        else:
            print("Cis skipped because no frames are cis.")
        if len(trans) > 0:
            NOE_trans['md'],NOE_trans['lower'], NOE_trans['upper'],NOE_dist_trans, pmf_plot_trans = compute_NOE_mdtraj(NOE_trans_dict, t[trans], snakemake.input.weights, 1, trans)
            
            NOE_output[f"{multi['trans']}"] = NOE_trans.to_dict(orient='index')
            # Deal with ambigous NOEs
            NOE_trans = NOE_trans.explode(['md', 'lower', 'upper'])
            # and ambigous/multiple values
            NOE_trans = NOE_trans.explode('NMR exp')
            fig, axs[0] = plot_NOE(NOE_trans, fig, axs[0])
            axs[0].set_title(f"Compound {multi['trans']} (trans)")
        else:
            print("Trans skipped because no frames are cis")
        #NOE_output = {f"{multi[cis]}": NOE_cis, f"{multi[trans]}": NOE_trans}
        src.utils.json_dump(snakemake.output.noe_result, NOE_output)
        fig.savefig(snakemake.output.noe_plot)
    else:
        NOE = src.noe.read_NOE(NOE_path)
        NOE_dict = NOE.to_dict(orient='index')
        NOE['md'], NOE['lower'], NOE['upper'], _, pmf_plot = src.noe.compute_NOE_mdtraj(NOE_dict, t, snakemake.input.weights, 1, weight_data=weight_data)
        plt.close()
        # Save NOE dict
        NOE_output = {f"{compound_index}": NOE.to_dict(orient='index')}
        # save as .json file
        src.utils.json_dump(snakemake.output.noe_result, NOE_output)
        
        # Deal with ambigous NOEs
        #NOE = NOE.explode('md')
        NOE = NOE.explode(['md', 'lower', 'upper'])
        # and ambigous/multiple values
        NOE = NOE.explode('NMR exp')
        fig, ax = plot_NOE(NOE)
        ax.set_title(f"Compound {compound_index}. NOE")
        fig.tight_layout()
        fig.savefig(snakemake.output.noe_plot)
else:
    print("cMD - no reweighted NOEs performed.")
../_images/ed6dd3148ef9b069_GaMD_processed_60_0.png
NOE
Atom 1 Atom 2 NMR exp lower bound upper bound md lower upper
0 (3,) (26,) 3.19 0.0 3.51 3.269909 2.836031 4.368379
1 (3,) (13, 14) 1.83 0.0 3.16 4.365435 3.530876 4.657684
1 (3,) (13, 14) 1.83 0.0 3.16 3.754032 2.750813 4.398247
2 (22,) (26,) 2.93 0.0 3.22 2.349598 2.167836 3.501157
3 (19, 20) (26,) 2.73 0.0 4.27 2.652611 2.334386 3.49688
3 (19, 20) (26,) 2.73 0.0 4.27 3.119261 2.709423 4.254798
4 (16, 17) (26,) 2.71 0.0 4.24 3.438241 2.678757 4.430973
4 (16, 17) (26,) 2.71 0.0 4.24 4.692144 4.167753 5.06137
5 (13, 14) (26,) 2.63 0.0 4.14 3.72845 2.646147 5.257526
5 (13, 14) (26,) 2.63 0.0 4.14 4.996276 3.952186 5.619073
6 (47,) (59,) 2.57 0.0 2.83 3.307918 2.392653 3.577991
7 (49,) (59,) 3.39 0.0 3.73 2.742391 2.347283 3.951439
8 (61,) (1,) 2.17 0.0 2.39 3.10258 2.284587 3.56489
9 (1,) (3,) 2.60 0.0 2.86 2.549587 2.262747 2.912062
10 (1,) (5, 6) 2.37 0.0 3.83 3.136118 2.631868 3.585502
10 (1,) (5, 6) 2.37 0.0 3.83 2.997909 2.637545 3.651101
11 (16, 17) (13, 14) 1.73 0.0 4.19 2.271353 2.170389 2.406081
11 (16, 17) (13, 14) 1.73 0.0 4.19 2.772372 2.656112 2.95778
11 (16, 17) (13, 14) 1.73 0.0 4.19 2.754857 2.660409 2.945727
11 (16, 17) (13, 14) 1.73 0.0 4.19 2.26829 2.168391 2.401991
12 (26,) (28,) 2.31 0.0 2.54 2.175859 2.089499 2.861964
13 (28,) (35, 36, 37, 39, 40, 41) 3.23 0.0 6.99 2.887566 2.379667 4.357423
13 (28,) (35, 36, 37, 39, 40, 41) 3.23 0.0 6.99 3.189468 2.48276 4.4243
13 (28,) (35, 36, 37, 39, 40, 41) 3.23 0.0 6.99 3.13261 2.46419 4.411258
13 (28,) (35, 36, 37, 39, 40, 41) 3.23 0.0 6.99 2.745668 2.323668 4.360998
13 (28,) (35, 36, 37, 39, 40, 41) 3.23 0.0 6.99 5.407195 3.710022 5.537008
13 (28,) (35, 36, 37, 39, 40, 41) 3.23 0.0 6.99 2.763082 2.329042 4.366113
14 (45,) (50,) 2.75 0.0 3.02 2.686155 2.405164 3.455608
15 (45,) (49,) 3.02 0.0 3.32 2.367686 2.206339 3.104622
16 (47,) (50,) 2.26 0.0 2.49 2.531027 2.363641 2.998021
17 (47,) (49,) 2.11 0.0 2.32 2.641353 2.412435 2.978214
18 (50,) (54, 55) 2.60 0.0 6.06 2.681431 2.28373 3.671219
18 (50,) (54, 55) 2.60 0.0 6.06 2.687276 2.285331 3.65452
19 (49,) (54, 55) 3.31 0.0 7.11 2.756263 2.3283 3.67308
19 (49,) (54, 55) 3.31 0.0 7.11 2.800008 2.340225 3.663654
20 (59,) (63, 64) 2.49 0.0 3.98 3.148151 2.679068 3.707264
20 (59,) (63, 64) 2.49 0.0 3.98 3.114627 2.624638 3.578109
21 (61,) (63, 64) 2.08 0.0 3.47 2.51486 2.348111 2.788316
21 (61,) (63, 64) 2.08 0.0 3.47 2.346134 2.232595 2.913819
#NOE_dict
pickle_dump(snakemake.output.noe_dist, NOE_dist)
# if not multiple:
#     ax.violinplot(NOE_dist, positions=np.arange(len(NOE_dist)), widths=0.8, showmeans=True)
#     fig.tight_layout()
#     fig.savefig(snakemake.output.noe_plot)
# else:
#     if len(cis) > 0:
#         axs[1].violinplot(NOE_dist_cis, positions=np.arange(len(NOE_dist_cis)), widths=0.8, showmeans=True)
#     if len(trans) > 0:
#         axs[0].violinplot(NOE_dist_trans, positions=np.arange(len(NOE_dist_trans)), widths=0.8, showmeans=True)
#     fig.tight_layout()
#     fig.savefig(snakemake.output.noe_plot)
fig
../_images/ed6dd3148ef9b069_GaMD_processed_65_0.png
NOE
Atom 1 Atom 2 NMR exp lower bound upper bound md lower upper
0 (3,) (26,) 3.19 0.0 3.51 3.269909 2.836031 4.368379
1 (3,) (13, 14) 1.83 0.0 3.16 4.365435 3.530876 4.657684
1 (3,) (13, 14) 1.83 0.0 3.16 3.754032 2.750813 4.398247
2 (22,) (26,) 2.93 0.0 3.22 2.349598 2.167836 3.501157
3 (19, 20) (26,) 2.73 0.0 4.27 2.652611 2.334386 3.49688
3 (19, 20) (26,) 2.73 0.0 4.27 3.119261 2.709423 4.254798
4 (16, 17) (26,) 2.71 0.0 4.24 3.438241 2.678757 4.430973
4 (16, 17) (26,) 2.71 0.0 4.24 4.692144 4.167753 5.06137
5 (13, 14) (26,) 2.63 0.0 4.14 3.72845 2.646147 5.257526
5 (13, 14) (26,) 2.63 0.0 4.14 4.996276 3.952186 5.619073
6 (47,) (59,) 2.57 0.0 2.83 3.307918 2.392653 3.577991
7 (49,) (59,) 3.39 0.0 3.73 2.742391 2.347283 3.951439
8 (61,) (1,) 2.17 0.0 2.39 3.10258 2.284587 3.56489
9 (1,) (3,) 2.60 0.0 2.86 2.549587 2.262747 2.912062
10 (1,) (5, 6) 2.37 0.0 3.83 3.136118 2.631868 3.585502
10 (1,) (5, 6) 2.37 0.0 3.83 2.997909 2.637545 3.651101
11 (16, 17) (13, 14) 1.73 0.0 4.19 2.271353 2.170389 2.406081
11 (16, 17) (13, 14) 1.73 0.0 4.19 2.772372 2.656112 2.95778
11 (16, 17) (13, 14) 1.73 0.0 4.19 2.754857 2.660409 2.945727
11 (16, 17) (13, 14) 1.73 0.0 4.19 2.26829 2.168391 2.401991
12 (26,) (28,) 2.31 0.0 2.54 2.175859 2.089499 2.861964
13 (28,) (35, 36, 37, 39, 40, 41) 3.23 0.0 6.99 2.887566 2.379667 4.357423
13 (28,) (35, 36, 37, 39, 40, 41) 3.23 0.0 6.99 3.189468 2.48276 4.4243
13 (28,) (35, 36, 37, 39, 40, 41) 3.23 0.0 6.99 3.13261 2.46419 4.411258
13 (28,) (35, 36, 37, 39, 40, 41) 3.23 0.0 6.99 2.745668 2.323668 4.360998
13 (28,) (35, 36, 37, 39, 40, 41) 3.23 0.0 6.99 5.407195 3.710022 5.537008
13 (28,) (35, 36, 37, 39, 40, 41) 3.23 0.0 6.99 2.763082 2.329042 4.366113
14 (45,) (50,) 2.75 0.0 3.02 2.686155 2.405164 3.455608
15 (45,) (49,) 3.02 0.0 3.32 2.367686 2.206339 3.104622
16 (47,) (50,) 2.26 0.0 2.49 2.531027 2.363641 2.998021
17 (47,) (49,) 2.11 0.0 2.32 2.641353 2.412435 2.978214
18 (50,) (54, 55) 2.60 0.0 6.06 2.681431 2.28373 3.671219
18 (50,) (54, 55) 2.60 0.0 6.06 2.687276 2.285331 3.65452
19 (49,) (54, 55) 3.31 0.0 7.11 2.756263 2.3283 3.67308
19 (49,) (54, 55) 3.31 0.0 7.11 2.800008 2.340225 3.663654
20 (59,) (63, 64) 2.49 0.0 3.98 3.148151 2.679068 3.707264
20 (59,) (63, 64) 2.49 0.0 3.98 3.114627 2.624638 3.578109
21 (61,) (63, 64) 2.08 0.0 3.47 2.51486 2.348111 2.788316
21 (61,) (63, 64) 2.08 0.0 3.47 2.346134 2.232595 2.913819
# dis_0 = NOE_dist[0]
# dis_0.tofile('/Users/daniel/Projects/research/03_macroconf/libs/pyreweight/test-data/distances.dat', sep="\n")
if snakemake.params.method != "cMD":
    if not multiple:
        pmf_plot.suptitle('PMF plots. PMF vs. distance')
        pmf_plot.tight_layout()
        pmf_plot.savefig(snakemake.output.noe_pmf)
        fig = pmf_plot
    else:
        # save to image data
        io_cis = io.BytesIO()
        io_trans = io.BytesIO()
        if len(cis) > 0:
            pmf_plot_cis.savefig(io_cis, format='raw', dpi = pmf_plot_cis.dpi)
        if len(trans) > 0:
            pmf_plot_trans.savefig(io_trans, format='raw', dpi = pmf_plot_trans.dpi)
        
        if len(cis) > 0:
            io_cis.seek(0)
            img_cis = np.reshape(np.frombuffer(io_cis.getvalue(), dtype=np.uint8), newshape=(int(pmf_plot_cis.bbox.bounds[3]), int(pmf_plot_cis.bbox.bounds[2]), -1))
            io_cis.close()
        
        if len(trans) > 0:
            io_trans.seek(0)
            img_trans = np.reshape(np.frombuffer(io_trans.getvalue(), dtype=np.uint8), newshape=(int(pmf_plot_trans.bbox.bounds[3]), int(pmf_plot_trans.bbox.bounds[2]), -1))
            io_trans.close()

        fig, axs = plt.subplots(2,1)
        fig.set_size_inches(16,30)
        if len(cis) > 0:
            axs[0].imshow(img_cis)
            axs[0].axis('off')
            axs[0].set_title('cis')
        if len(trans) > 0:
            axs[1].imshow(img_trans)
            axs[1].set_title('trans')
            axs[1].axis('off')
        #fig.suptitle('PMF plots. PMF vs. distance')
        fig.tight_layout()
        fig.savefig(snakemake.output.noe_pmf)
else:
    fig, ax = plt.subplots()
    ax.text(0.5, 0.5, 'not applicable.')
    fig.savefig(snakemake.output.noe_pmf)
fig
../_images/ed6dd3148ef9b069_GaMD_processed_69_0.png
from scipy import stats
from sklearn import metrics, utils
if multiple:
    NOE_stat_cis = pd.DataFrame(columns=['stat', 'value', 'up', 'low'])
    NOE_stat_trans = pd.DataFrame(columns=['stat', 'value', 'up', 'low'])
else:
    NOE_stats = pd.DataFrame(columns=['stat', 'value', 'up', 'low'])
if multiple:
    if len(cis) > 0:
        NOE_cis_t = NOE_cis

        NOE_cis_t['dev'] = NOE_cis_t['md'] - np.abs(NOE_cis_t['NMR exp'])
        NOE_cis_t['abs_dev'] = np.abs(NOE_cis_t['md'] - np.abs(NOE_cis_t['NMR exp']))

        NOE_cis_t = NOE_cis_t.sort_values('abs_dev',ascending=True)
        NOE_cis_t.index = NOE_cis_t.index.astype(int)
        NOE_cis_t = NOE_cis_t[~NOE_cis_t.index.duplicated(keep='first')].sort_index(kind='mergesort')
        
        NOE_cis_t = NOE_cis_t.dropna()
    if len(trans) > 0:
        NOE_trans_t = NOE_trans

        NOE_trans_t['dev'] = NOE_trans_t['md'] - np.abs(NOE_trans_t['NMR exp'])
        NOE_trans_t['abs_dev'] = np.abs(NOE_trans_t['md'] - np.abs(NOE_trans_t['NMR exp']))

        NOE_trans_t = NOE_trans_t.sort_values('abs_dev',ascending=True)
        NOE_trans_t.index = NOE_trans_t.index.astype(int)
        NOE_trans_t = NOE_trans_t[~NOE_trans_t.index.duplicated(keep='first')].sort_index(kind='mergesort')
        
        NOE_trans_t = NOE_trans_t.dropna()
    
    # Merge cis/trans ? Or separate?
    # Separate:
else:
    # Remove duplicate values (keep value closest to experimental value)
    NOE_test = NOE
    if (NOE_test['NMR exp'].to_numpy() == 0).all():
        # if all exp values are 0: take middle between upper / lower bound as reference value
        NOE_test['NMR exp'] = (NOE_test['upper bound'] + NOE_test['lower bound']) * 0.5
    NOE_test['dev'] = NOE_test['md'] - np.abs(NOE_test['NMR exp'])
    NOE_test['abs_dev'] = np.abs(NOE_test['md'] - np.abs(NOE_test['NMR exp']))

    NOE_test = NOE_test.sort_values('abs_dev',ascending=True)
    NOE_test.index = NOE_test.index.astype(int)
    NOE_test = NOE_test[~NOE_test.index.duplicated(keep='first')].sort_index(kind='mergesort')
    
    # drop NaN values:
    NOE_test = NOE_test.dropna()
    NOE_test
# Compute statistics
# CI via bootstrapping. Parameters:
n_bootstrap = 10000
CI = 0.95
def compute_MAE(exp, md, CI=0.95, n_bootstrap=10000):
    MAE = metrics.mean_absolute_error(exp, md)

    # bootstrap CI's
    # Compute errors/CI:
    boot_stats = []

    for i in range(n_bootstrap):
        resample = utils.resample(exp.to_list(),md.to_list())

        #perform statistical eval
        result = metrics.mean_absolute_error(resample[0], resample[1])
        boot_stats.append(result)

    # Confidence intervals

    p = ((1.0-CI)/2.0) * 100
    lower = max(0.0, np.percentile(boot_stats, p))
    p = (CI+((1.0-CI)/2.0)) * 100
    upper = np.percentile(boot_stats, p)
    return MAE, upper, lower
# MAE - Mean absolute error
if multiple:
    if len(cis) > 0:
        # cis
        MAE, upper, lower = compute_MAE(NOE_cis_t['NMR exp'], NOE_cis_t['md'])
        append = {'stat': 'MAE', 'value': MAE, 'up': upper, 'low': lower}
        NOE_stat_cis = NOE_stat_cis.append(append, ignore_index=True)
    if len(trans) > 0:
        # trans
        MAE, upper, lower = compute_MAE(NOE_trans_t['NMR exp'], NOE_trans_t['md'])
        append = {'stat': 'MAE', 'value': MAE, 'up': upper, 'low': lower}
        NOE_stat_trans = NOE_stat_trans.append(append, ignore_index=True)
else:
    MAE, upper, lower = compute_MAE(NOE_test['NMR exp'], NOE_test['md'])
    append = {'stat': 'MAE', 'value': MAE, 'up': upper, 'low': lower}
    NOE_stats = NOE_stats.append(append, ignore_index=True)
def compute_MSE(dev, CI=0.95, n_boostrap=10000):
    # MSE - Mean signed error
    MSE = dev.mean()

    # bootstrap CI's
    # Compute errors/CI:
    boot_stats = []

    for i in range(n_bootstrap):
        resample = utils.resample(dev)

        #perform statistical eval
        result = resample.mean()
        boot_stats.append(result)

    # Confidence intervals

    p = ((1.0-CI)/2.0) * 100
    lower = max(0.0, np.percentile(boot_stats, p))
    p = (CI+((1.0-CI)/2.0)) * 100
    upper = np.percentile(boot_stats, p)
    #plt.hist(boot_stats)
    return MSE, upper, lower
# MSE - Mean signed error
if multiple:
    if len(cis) > 0:
        # cis
        MSE, upper, lower = compute_MSE(NOE_cis_t['dev'])
        append = {'stat': 'MSE', 'value': MSE, 'up': upper, 'low': lower}
        NOE_stat_cis = NOE_stat_cis.append(append, ignore_index=True)
    if len(trans) > 0:
        # trans
        MSE, upper, lower = compute_MSE(NOE_trans_t['dev'])
        append = {'stat': 'MSE', 'value': MSE, 'up': upper, 'low': lower}
        NOE_stat_trans = NOE_stat_trans.append(append, ignore_index=True)
else:
    MSE, upper, lower = compute_MSE(NOE_test['dev'])

    append = {'stat': 'MSE', 'value': MSE, 'up': upper, 'low': lower}
    NOE_stats = NOE_stats.append(append, ignore_index=True)
def compute_RMSD(exp, md, CI=0.95, n_bootstrap=10000):
    # RMSD - root mean squared error
    RMSD = metrics.mean_squared_error(exp, md, squared=False)

    # bootstrap CI's
    # Compute errors/CI:
    boot_stats = []

    for i in range(n_bootstrap):
        resample = utils.resample(exp.to_list(), md.to_list())

        #perform statistical eval
        result = metrics.mean_squared_error(resample[0], resample[1], squared=False)
        boot_stats.append(result)

    # Confidence intervals

    p = ((1.0-CI)/2.0) * 100
    lower = max(0.0, np.percentile(boot_stats, p))
    p = (CI+((1.0-CI)/2.0)) * 100
    upper = np.percentile(boot_stats, p)
    return RMSD, upper, lower
# RMSD - root mean squared error
if multiple:
    if len(cis) > 0:
        # cis
        RMSD, upper, lower = compute_RMSD(NOE_cis_t['NMR exp'], NOE_cis_t['md'])
        append = {'stat': 'RMSD', 'value': RMSD, 'up': upper, 'low': lower}
        NOE_stat_cis = NOE_stat_cis.append(append, ignore_index=True)

    if len(trans) > 0:
        # trans
        RMSD, upper, lower = compute_RMSD(NOE_trans_t['NMR exp'], NOE_trans_t['md'])
        append = {'stat': 'RMSD', 'value': RMSD, 'up': upper, 'low': lower}
        NOE_stat_trans = NOE_stat_trans.append(append, ignore_index=True)
else:
    RMSD, upper, lower = compute_RMSD(NOE_test['NMR exp'], NOE_test['md'])
    append = {'stat': 'RMSD', 'value': RMSD, 'up': upper, 'low': lower}
    NOE_stats = NOE_stats.append(append, ignore_index=True)
def compute_pearsonr(exp, md, CI=0.95, n_bootstrap=10000):
    # pearsonr
    pearsonr = stats.pearsonr(exp, md)[0]
    #slope, intercept, pearsonr, p_value, std_err = scipy.stats.linregress(exp.to_list(), md.to_list())

    # bootstrap CI's
    # Compute errors/CI:
    boot_stats = []

    for i in range(n_bootstrap):
        resample = utils.resample(exp.to_list(), md.to_list())

        #perform statistical eval
        result = stats.pearsonr(resample[0], resample[1])
        #_, _2, result, _3, _4 = scipy.stats.linregress(resample[0], resample[1])
        boot_stats.append(result[0])
        #boot_stats.append(result)

    # Confidence intervals

    p = ((1.0-CI)/2.0) * 100
    lower = max(0.0, np.percentile(boot_stats, p))
    p = (CI+((1.0-CI)/2.0)) * 100
    upper = min(1.0, np.percentile(boot_stats, p))
    return pearsonr, upper, lower
if multiple:
    if len(cis) > 0:
        # cis
        pearsonr, upper, lower = compute_pearsonr(NOE_cis_t['NMR exp'], NOE_cis_t['md'])
        append = {'stat': 'pearsonr', 'value': pearsonr, 'up': upper, 'low': lower}
        NOE_stat_cis = NOE_stat_cis.append(append, ignore_index=True)
    
    if len(trans) > 0:
        # trans
        pearsonr, upper, lower = compute_pearsonr(NOE_trans_t['NMR exp'], NOE_trans_t['md'])
        append = {'stat': 'pearsonr', 'value': pearsonr, 'up': upper, 'low': lower}
        NOE_stat_trans = NOE_stat_trans.append(append, ignore_index=True)
else:
    # pearsonr
    pearsonr, upper, lower = compute_pearsonr(NOE_test['NMR exp'], NOE_test['md'])
    append = {'stat': 'pearsonr', 'value': pearsonr, 'up': upper, 'low': lower}
    NOE_stats = NOE_stats.append(append, ignore_index=True)
def compute_kendalltau(exp, md, CI=0.95, n_bootstrap=10000):
    kendalltau = stats.kendalltau(exp, md)[0]

    # bootstrap CI's
    # Compute errors/CI:
    boot_stats = []

    for i in range(n_bootstrap):
        resample = utils.resample(exp.to_list(), md.to_list())

        #perform statistical eval
        result = stats.kendalltau(resample[0], resample[1])
        boot_stats.append(result[0])

    # Confidence intervals

    p = ((1.0-CI)/2.0) * 100
    lower = max(0.0, np.percentile(boot_stats, p))
    p = (CI+((1.0-CI)/2.0)) * 100
    upper = min(1.0, np.percentile(boot_stats, p))
    return kendalltau, upper, lower
if multiple:
    if len(cis) > 0:
        #cis
        kendalltau, upper, lower = compute_kendalltau(NOE_cis_t['NMR exp'], NOE_cis_t['md'])
        append = {'stat': 'kendalltau', 'value': kendalltau, 'up': upper, 'low': lower}
        NOE_stat_cis = NOE_stat_cis.append(append, ignore_index=True)
    
    if len(trans) > 0:
        #trans
        kendalltau, upper, lower = compute_kendalltau(NOE_trans_t['NMR exp'], NOE_trans_t['md'])
        append = {'stat': 'kendalltau', 'value': kendalltau, 'up': upper, 'low': lower}
        NOE_stat_trans = NOE_stat_trans.append(append, ignore_index=True)
else:
    # Kendall's tau
    kendalltau, upper, lower = compute_kendalltau(NOE_test['NMR exp'], NOE_test['md'])
    append = {'stat': 'kendalltau', 'value': kendalltau, 'up': upper, 'low': lower}
    NOE_stats = NOE_stats.append(append, ignore_index=True)
def compute_chisquared(exp, md, CI=0.95, n_bootstrap=10000):
    chi_elements = (md - exp)**2 / exp
    chisq = chi_elements.sum()

    # bootstrap CI's
    # Compute errors/CI:
    boot_stats = []

    for i in range(n_bootstrap):
        resample = utils.resample(chi_elements)

        #perform statistical eval
        result = resample.sum()
        boot_stats.append(result)

    # Confidence intervals

    p = ((1.0-CI)/2.0) * 100
    lower = max(0.0, np.percentile(boot_stats, p))
    p = (CI+((1.0-CI)/2.0)) * 100
    upper = np.percentile(boot_stats, p)
    return chisq, upper, lower
# Chi squared
if multiple:
    if len(cis) > 0:
        # cis
        chisq, upper, lower = compute_chisquared(NOE_cis_t['NMR exp'], NOE_cis_t['md'])
        append = {'stat': 'chisq', 'value': chisq, 'up': upper, 'low': lower}
        NOE_stat_cis = NOE_stat_cis.append(append, ignore_index=True)
    
    if len(trans) > 0:
        #trans
        chisq, upper, lower = compute_chisquared(NOE_trans_t['NMR exp'], NOE_trans_t['md'])
        append = {'stat': 'chisq', 'value': chisq, 'up': upper, 'low': lower}
        NOE_stat_trans = NOE_stat_trans.append(append, ignore_index=True)
else:
    chisq, upper, lower = compute_chisquared(NOE_test['NMR exp'], NOE_test['md'])
    append = {'stat': 'chisq', 'value': chisq, 'up': upper, 'low': lower}
    NOE_stats = NOE_stats.append(append, ignore_index=True)
if multiple:
    if len(cis) > 0 and len(trans) > 0:
        NOE_stat_combined = {'cis': NOE_stat_cis.to_dict(), 'trans': NOE_stat_trans.to_dict()}
    elif len(cis) > 0 and len(trans) <= 0:
        NOE_stat_combined = {'cis': NOE_stat_cis.to_dict()}
    else:
        NOE_stat_combined = {'trans': NOE_stat_trans.to_dict()}
    src.utils.json_dump(snakemake.output.noe_stats, NOE_stat_combined)
else:
    NOE_stats.to_json(snakemake.output.noe_stats)
    NOE_stats
if multiple:
    fig, axs = plt.subplots(2,1)
    if len(cis) > 0:
        # cis
        axs[0].scatter(NOE_cis_t['NMR exp'], NOE_cis_t['md'])
        axs[0].set_ylabel('MD')
        axs[0].set_xlabel('Experimental NOE value')
        axs[0].axline((1.5, 1.5), slope=1, color="black")
        axs[0].set_title("Experimental vs MD derived NOE values - cis")
    
    if len(trans) > 0:
        #trans
        axs[1].scatter(NOE_trans_t['NMR exp'], NOE_trans_t['md'])
        axs[1].set_ylabel('MD')
        axs[1].set_xlabel('Experimental NOE value')
        axs[1].axline((1.5, 1.5), slope=1, color="black")
        axs[1].set_title("Experimental vs MD derived NOE values - trans")
    fig.savefig(snakemake.output.noe_stat_plot)
else:
    plt.scatter(NOE_test['NMR exp'], NOE_test['md'])
    if snakemake.params.method != "cMD":
        plt.scatter(NOE_test['NMR exp'], NOE_test['upper'], marker='_')
        plt.scatter(NOE_test['NMR exp'], NOE_test['lower'], marker='_')
    plt.ylabel('MD')
    plt.xlabel('Experimental NOE value')
    plt.axline((1.5, 1.5), slope=1, color="black")
    plt.title("Experimental vs MD derived NOE values")
    plt.savefig(snakemake.output.noe_stat_plot)
../_images/ed6dd3148ef9b069_GaMD_processed_87_0.png
# is the mean deviation significantly different than 0? if pvalue < 5% -> yes! We want: no! (does not deviate from exp. values)
if multiple:
    if len(cis) > 0:
        print(stats.ttest_1samp(NOE_cis_t['dev'], 0.0))
    if len(trans) > 0:
        print(stats.ttest_1samp(NOE_trans_t['dev'], 0.0))
else:
    print(stats.ttest_1samp(NOE_test['dev'], 0.0))
Ttest_1sampResult(statistic=1.9197273838074582, pvalue=0.06858066325619995)
if multiple:
    if len(cis) > 0:
        print(stats.describe(NOE_cis_t['dev']))
    if len(trans) > 0:
        print(stats.describe(NOE_trans_t['dev']))
else:
    print(stats.describe(NOE_test['dev']))
DescribeResult(nobs=22, minmax=(-0.6523140238434166, 1.9240324063195944), mean=0.2584210707182275, variance=0.3986570090994572, skewness=0.6067424009031085, kurtosis=0.3972316397467406)
print('Done')
Done